First let’s load some packages:

library(phenocamapi)
## Loading required package: data.table
## Loading required package: rjson
## Loading required package: RCurl
## Loading required package: bitops
library(plotly)
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(phenocamr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Let’s start by pulling in a list of sites that have AmeriFlux towers and PhenoCams via the phenocamapi package:

phenos=get_phenos()
#and let's check the column names:
colnames(phenos)
##  [1] "site"                      "lat"                      
##  [3] "lon"                       "elev"                     
##  [5] "active"                    "utc_offset"               
##  [7] "date_first"                "date_last"                
##  [9] "infrared"                  "contact1"                 
## [11] "contact2"                  "site_description"         
## [13] "site_type"                 "group"                    
## [15] "camera_description"        "camera_orientation"       
## [17] "flux_data"                 "flux_networks"            
## [19] "flux_sitenames"            "dominant_species"         
## [21] "primary_veg_type"          "secondary_veg_type"       
## [23] "site_meteorology"          "MAT_site"                 
## [25] "MAP_site"                  "MAT_daymet"               
## [27] "MAP_daymet"                "MAT_worldclim"            
## [29] "MAP_worldclim"             "koeppen_geiger"           
## [31] "ecoregion"                 "landcover_igbp"           
## [33] "dataset_version1"          "site_acknowledgements"    
## [35] "modified"                  "flux_networks_name"       
## [37] "flux_networks_url"         "flux_networks_description"

Notice that ‘flux_data’ (true-false), ‘flux_networks’, and ‘flux_sitenames’ are all variables that you can either filter by or retain the info of. Let’s grab all Ameriflux sites where we have the sitename stored:

phenos=phenos%>%
  filter(phenos$flux_data=='TRUE')%>%
filter(flux_networks_name=='AMERIFLUX')%>%
  filter(flux_sitenames!='NA')%>%
  select(flux_sitenames, site, date_first, date_last, site_description, primary_veg_type)
head(phenos)
##                       flux_sitenames                 site date_first
## 1                             US-NC4       alligatorriver 2012-05-03
## 2 US-Br1: Brooks Field Site 10- Ames          arsbrooks10 2019-07-10
## 3 US-Br3: Brooks Field Site 11- Ames          arsbrooks11 2019-07-01
## 4                             US-Rws arsgreatbasinltar098 2017-05-18
## 5                             US-Rms arsgreatbasinltar177 2018-10-17
## 6                             US-SP1           austincary 2016-01-11
##    date_last
## 1 2019-09-05
## 2 2019-09-05
## 3 2019-09-05
## 4 2019-09-02
## 5 2019-08-29
## 6 2019-09-05
##                                                                                site_description
## 1                                      Alligator River National Wildlife Refuge, North Carolina
## 2                                                              Brooks Field site 10, Ames, Iowa
## 3                                                              Brooks Field site 11, Ames, Iowa
## 4                                ARS, Great Basin LTAR, ARTRW8 community, Reynolds Creek, Idaho
## 5                           ARS, Great Basin LTAR, ARTRV/SYORU community, Reynolds Creek, Idaho
## 6 Austin Cary Forest, School of Forest Resources & Conservation, University of Florida, Florida
##   primary_veg_type
## 1               DB
## 2               AG
## 3               AG
## 4               SH
## 5               SH
## 6               EN

external data from phenocamapi package

Now let’s select two combination phenocam-flux tower sites from different plant functional types to explore (e.g. one grassland site and one evergreen needleleaf site)

#example
GrassSites=phenos%>%
  filter(phenos$primary_veg_type=='GR')
head(GrassSites) #just viewing the top 6 sites in the dataframe
##         flux_sitenames         site date_first  date_last
## 1 US-MTB (forthcoming)      bozeman 2015-08-16 2019-09-05
## 2               US-FR1 freemangrass 2012-03-13 2014-02-27
## 3               US-KFS       kansas 2012-03-17 2019-09-05
## 4               US-Wkg      kendall 2012-07-06 2019-09-05
## 5               US-Kon        konza 2012-03-17 2019-09-05
## 6               CA-Let   lethbridge 2011-12-07 2019-09-05
##                                                           site_description
## 1                   Bangtail Study Area, Montana State University, Montana
## 2 Grassland Site, Texas State University, Freeman Ranch, San Marcos, Texas
## 3                           KU Field Station, University of Kansas, Kansas
## 4                                               Kendall Grassland, Arizona
## 5        Konza Prairie Biological Station, Kansas State University, Kansas
## 6                 Lethbridge Grassland Ecosystem Site, Lethbridge, Alberta
##   primary_veg_type
## 1               GR
## 2               GR
## 3               GR
## 4               GR
## 5               GR
## 6               GR
EvergreenSites=phenos%>%
  filter(phenos$primary_veg_type=='EN')
head(EvergreenSites)
##   flux_sitenames                   site date_first  date_last
## 1         US-SP1             austincary 2016-01-11 2019-09-05
## 2         US-Ha2         harvardhemlock 2010-06-08 2019-09-05
## 3         US-Ha2        harvardhemlock2 2015-09-03 2019-09-05
## 4         US-HB3 hobcawclearcutlongleaf 2018-12-18 2019-09-05
## 5         US-HB2   hobcawmaturelongleaf 2018-12-17 2019-09-05
## 6         US-Ho1               howland1 2007-01-01 2019-09-05
##                                                                                site_description
## 1 Austin Cary Forest, School of Forest Resources & Conservation, University of Florida, Florida
## 2                                       Hemlock Tower, Harvard Forest, Petersham, Massachusetts
## 3                                             Hemlock Tower, Harvard Forest, Petersham, MA, USA
## 4                                                                 Hobcaw Longleaf Pine Clearcut
## 5                                                                   Hobcaw Mature Longleaf Pine
## 6                                     Main Tower (Mature stand), Howland Forest, Howland, Maine
##   primary_veg_type
## 1               EN
## 2               EN
## 3               EN
## 4               EN
## 5               EN
## 6               EN
FirstSite=GrassSites[5, ] #I randomly chose a site in the table
FirstSite
##   flux_sitenames  site date_first  date_last
## 5         US-Kon konza 2012-03-17 2019-09-05
##                                                    site_description
## 5 Konza Prairie Biological Station, Kansas State University, Kansas
##   primary_veg_type
## 5               GR
SecondSite=EvergreenSites[29,]
SecondSite
##    flux_sitenames     site date_first  date_last
## 29         US-Me2 oregonMP 2011-06-14 2019-09-05
##                                           site_description
## 29 Metolius intermediate pine/US-Me2, near Sisters, Oregon
##    primary_veg_type
## 29               EN

Chose your own sites and build out your code chunk here:

 #edit so that ameriflux folks can access their site

Koen Huffkens developed the phenocamr package, which streamlines access to quality controlled data. We will now use this package to download and process site based data according to a standardized methodology.

A full description of the methodology is provided in Scientific Data: Tracking vegetation phenology across diverse North American biomes using PhenoCam imagery (Richardson et al. 2018).

#uncomment if you need to install via devtools
#if(!require(devtools)){install.package(devtools)}
#devtools::install_github("khufkens/phenocamr")
library(phenocamr)

Use the dataframe you built to feed the phenocamr package. Note: you can choose either a daily or 3 day product

phenocamr::download_phenocam(
  frequency = 3,
  veg_type = FirstSite$primary_veg_type,
  roi_id = 1000,
  site = FirstSite$site,
  phenophase = TRUE,
  out_dir = "."
  )
## Downloading: konza_GR_1000_3day.csv
## -- Flagging outliers!
## -- Smoothing time series!
## -- Estimating transition dates!
phenocamr::download_phenocam(
  frequency = 3,
  veg_type = SecondSite$primary_veg_type,
  roi_id = 1000,
  site = SecondSite$site,
  phenophase = TRUE,
  out_dir = "."
  )
## Downloading: oregonMP_EN_1000_3day.csv
## -- Flagging outliers!
## -- Smoothing time series!
## -- Estimating transition dates!

Now look in your working directory. You have data! Read it in:

# load the time series data but replace the csv filename with whatever you downloaded
df <- read.table("konza_GR_1000_3day.csv", header = TRUE, sep = ",")

# read in the transition date file
td <- read.table("konza_GR_1000_3day_transition_dates.csv",
                 header = TRUE,
                 sep = ",")

Let’s take a look at the data:

p = plot_ly() %>%
  add_trace(
  data = df,
  x = ~ as.Date(date),
  y = ~ smooth_gcc_90,
  name = 'Smoothed GCC',
  showlegend = TRUE,
  type = 'scatter',
  mode = 'line'
) %>% add_markers(
  data=df,
  x ~ as.Date(date),
  y = ~gcc_90,
  name = 'GCC',
  type = 'scatter',
  color ='#07A4B5', 
  opacity=.5
)
p
## Warning: Ignoring 2186 observations
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

What patterns do you notice? How would we go about determining say the start of spring? (SOS)

Threshold values

Let’s subset the transition date (td) for each year when 25% of the greenness amplitude (of the 90^th) percentile is reached (threshold_25).

# select the rising (spring dates) for 25% threshold of Gcc 90
spring <- td[td$direction == "rising" & td$gcc_value == "gcc_90",]

Now let’s create a simple plot_ly line graph of the smooth Green Chromatic Coordinate (Gcc) and add points for transition dates:

p = plot_ly() %>%
  add_trace(
  data = df,
  x = ~ as.Date(date),
  y = ~ smooth_gcc_90,
  name = 'PhenoCam GCC',
  showlegend = TRUE,
  type = 'scatter',
  mode = 'line'
) %>% add_markers(
  data= spring, 
  x = ~ as.Date(spring$transition_25, origin = "1970-01-01"),
  y = ~ spring$threshold_25,
  type = 'scatter',
  mode = 'marker',
  name = 'Spring Dates')
                
p

Now we can see the transition date for each year of interest and the annual patterns of greenness.

However, if you want more control over the parameters used during processing, you can run through the three default processing steps as implemented in download_phenocam() and set parameters manually.

Of particular interest is the option to specify your own threshold used in determining transition dates.

What would be a reasonable threshold for peak greenness? Or autumn onset? Look at the ts dataset and phenocamr package and come up with a threshold. Use the same code to plot it here:

#print('code here')
#some hint code
#what does 'rising' versus 'falling' denote?
#what threshold should you choose?
#td <- phenophases("konza_GR_1000_3day.csv",
#            internal = TRUE,
#            upper_thresh = 0.8)
fall <- td[td$direction == "falling" & td$gcc_value == "gcc_90",]
#Now generate a fall dataframe, what metrics should you use?

Comparing phenology across vegetation types

Let’s load in a function to make plotting smoother. I’m dropped it here in the markdown so that you can edit it and re-run it as you see fit:

gcc_plot = function(gcc, spring, fall){
  unix = "1970-01-01"

  p = plot_ly(
    data = gcc,
    x = ~ date,
    y = ~ gcc_90,
    showlegend = FALSE,
    type = 'scatter',
    mode = 'markers'
  ) %>%
    add_trace(
      y = ~ smooth_gcc_90,
      mode = "lines",
      line = list(width = 2, color = "rgb(120,120,120)"),
      name = "Gcc loess fit",
      showlegend = TRUE
    ) %>%
    # SOS spring
    # 10%
    add_trace(
      data = spring,
      x = ~ as.Date(transition_10),
      y = ~ threshold_10,
      mode = "markers",
      type = "scatter",
      marker = list(color = "#7FFF00", symbol = "circle"),
      name = "SOS (10%)",
      showlegend = TRUE
    ) %>%
    add_segments(x = ~ as.Date(transition_10_lower_ci),
                 xend = ~ as.Date(transition_10_upper_ci),
                 # y = ~ 0,
                 # yend = ~ 1,
                 y = ~ threshold_10,
                 yend = ~ threshold_10,
                 line = list(color = "#7FFF00"),
                 name = "SOS (10%) - CI"
    ) %>%
    # 25 %
    add_trace(
      x = ~ as.Date(transition_25),
      y = ~ threshold_25,
      mode = "markers",
      type = "scatter",
      marker = list(color = "#66CD00", symbol = "square"),
      showlegend = TRUE,
      name = "SOS (25%)"
    ) %>%
    add_segments(x = ~ as.Date(transition_25_lower_ci),
                 xend = ~ as.Date(transition_25_upper_ci),
                 y = ~ threshold_25,
                 yend = ~ threshold_25,
                 line = list(color = "#66CD00"),
                 name = "SOS (25%) - CI"
    ) %>%
    # 50 %
    add_trace(
      x = ~ as.Date(transition_50),
      y = ~ threshold_50,
      mode = "markers",
      type = "scatter",
      marker = list(color = "#458B00", symbol = "diamond"),
      showlegend = TRUE,
      name = "SOS (50%)"
    ) %>%
    add_segments(x = ~ as.Date(transition_50_lower_ci),
                 xend = ~ as.Date(transition_50_upper_ci),
                 y = ~ threshold_50,
                 yend = ~ threshold_50,
                 line = list(color = "#458B00"),
                 name = "SOS (50%) - CI"
    ) %>%
    
    # EOS fall
    # 50%
    add_trace(
      data = fall,
      x = ~ as.Date(transition_50),
      y = ~ threshold_50,
      mode = "markers",
      type = "scatter",
      marker = list(color = "#FFB90F", symbol = "diamond"),
      showlegend = TRUE,
      name = "EOS (50%)"
    ) %>%
    add_segments(x = ~ as.Date(transition_50_lower_ci),
                 xend = ~ as.Date(transition_50_upper_ci),
                 y = ~ threshold_50,
                 yend = ~ threshold_50,
                 line = list(color = "#FFB90F"),
                 name = "EOS (50%) - CI"
    ) %>%
    # 25 %
    add_trace(
      x = ~ as.Date(transition_25),
      y = ~ threshold_25,
      mode = "markers",
      type = "scatter",
      marker = list(color = "#CD950C", symbol = "square"),
      showlegend = TRUE,
      name = "EOS (25%)"
    ) %>%
    add_segments(x = ~ as.Date(transition_25_lower_ci),
                 xend = ~ as.Date(transition_25_upper_ci),
                 y = ~ threshold_25,
                 yend = ~ threshold_25,
                 line = list(color = "#CD950C"),
                 name = "EOS (25%) - CI"
    ) %>%
    # 10 %
    add_trace(
      x = ~ as.Date(transition_10),
      y = ~ threshold_10,
      mode = "markers",
      marker = list(color = "#8B6508", symbol = "circle"),
      showlegend = TRUE,
      name = "EOS (10%)"
    ) %>%
    add_segments(x = ~ as.Date(transition_10_lower_ci),
                 xend = ~ as.Date(transition_10_upper_ci),
                 y = ~ threshold_10,
                 yend = ~ threshold_10,
                 line = list(color = "#8B6508"),
                 name = "EOS (10%) - CI"
    )
  return (p)
}
gcc_p = gcc_plot(df, spring, fall)
gcc_p
## Warning: Ignoring 2186 observations
## Warning: Can't display both discrete & non-discrete data on same axis

Now let’s look at inter-annual variation in spring onset. What is the difference in 25% greenness onset for your first site? #hint, look at the spring dataframe you just generated

#some hints to get you started
yr=spring$transition_25
yr=as.Date(yr)
yr
## [1] "2012-04-24" "2013-05-04" "2014-04-23" "2015-04-22" "2016-05-02"
## [6] "2017-04-21" "2018-05-01" "2019-05-01"
#pull out spring transition dates into separate columns
dates_split <- data.frame(date = yr,
                 year = as.numeric(format(yr, format = "%Y")),
                 month = as.numeric(format(yr, format = "%m")),
                 day = as.numeric(format(yr, format = "%d")))

#or track DOY changes
doy=as.Date(yr, format='%d%m%Y')
doy=lubridate::yday(doy)
doy
## [1] 115 124 113 112 123 111 121 121
doy=as.data.frame(doy)
spring_variation=cbind(yr, doy)
 p = plot_ly(
    data = spring_variation,
    x = ~ yr,
    y = ~ doy,
    showlegend = FALSE,
    type = 'scatter',
    mode = 'markers', 
    name = "Year"
  ) 
p

*** ###Comparing phenology of the same vegetation cover but across climate space

As Dr. Richardson mentioned this morning in his introduction lecture, the same plant functional types (e.g. grasslands) can have very different phenologogical cycles. Let’s pick two phenocam grassland sites: one from a tropical climate (kamuela), and one from an arid climate #edit

GrassSites=GrassSites[c(27,32),]

Now use the code you’ve generated above to pull in data from those sites:

#code here

Now let’s create a subplot of your grasslands to compare phenology, some hint code below:

#some hint code for subplotting in plot_ly:
#p <- subplot(p1, p2, nrows=2)
#p

Once you have a subplot of grassland phenology across 2 climates answer the following questions here in the markdown: 1. What seasonal patterns do you see? 2. Do you think you set your thresholds correctly for transition dates/phenophases? How might that very as a function of climate?


Flux Data Integration

Finally, let’s use Bijan’s package to pull in AmeriFlux data:

fluxfile <- system.file('fluxnetrepo/FLX_US-Me2/FLX_US-Me2_FULLSET_DD.csv', package = 'phenocamapi')

fluxts <- read.csv(fluxfile, skip = 0)
fluxts[fluxts==-9999] <- NA
fluxts <- as.data.table(fluxts)
fluxts[,datetime:=as.POSIXct(as.character(TIMESTAMP), format='%Y%m%d')]
fluxts[,YYYYMMDD:=as.character(as.Date(datetime))]
fluxts[,YEAR:=year(datetime)]
fluxts[,DOY:=yday(datetime)]

head(fluxts[, .(TIMESTAMP, TA_F)])
##    TIMESTAMP    TA_F
## 1:  20141115 -10.105
## 2:  20141116  -8.044
## 3:  20141117  -4.550
## 4:  20141118  -1.584
## 5:  20141119  -1.805
## 6:  20141120   4.019

Let’s read in the phenocam data for that fluxtower and plot it:

# load the time series data but replace the csv filename with whatever you downloaded
df <- read.table("oregonMP_EN_1000_3day.csv", header = TRUE, sep = ",")

# read in the transition date file
td <- read.table("oregonMP_EN_1000_3day_transition_dates.csv",
                 header = TRUE,
                 sep = ",")
spring <- td[td$direction == "rising" & td$gcc_value == "gcc_90",]
fall <- td[td$direction == "falling" & td$gcc_value == "gcc_90",]
gcc_p = gcc_plot(df, spring, fall)
gcc_p
## Warning: Ignoring 2212 observations
## Warning: Can't display both discrete & non-discrete data on same axis

Now let’s look at the flux data:

p = plot_ly(
    data = fluxts,
    x = ~ datetime,
    y = ~ GPP_DT_CUT_MEAN,
    showlegend = FALSE,
    type = 'scatter',
    mode = 'markers'
  ) 
p

We’ll need to filter the phenocam data to match up with the fluxtower subset:

sum(is.element(df$date, as.factor(fluxts$datetime)))
## [1] 46
sum(is.element(df$date, as.factor(fluxts$datetime)))
## [1] 46

Unfortunately our flux tower data is too short to see much of an overlap, but here’s where your data could come in! Below I’ve dropped some code to subplot fluxtower and phenocam data, if you have time during this workshop feel free to drag your data and code from above to complete a GPP/GCC comparison.

#some hint code for subplotting in plot_ly:
p1 = plot_ly(
    data = df,
    x = ~ date,
    y = ~ gcc_90,
    showlegend = FALSE,
    type = 'scatter',
    mode = 'markers'
  ) %>%
    add_trace(
      y = ~ smooth_gcc_90,
      mode = "lines",
      line = list(width = 2, color = "rgb(120,120,120)"),
      name = "Gcc loess fit",
      showlegend = TRUE
    )
p1
## Warning: Ignoring 2212 observations
#some hint code for subplotting in plot_ly:
#p <- subplot(p1, p2, nrows=2)
#p